1. Data wrangling

1.1 ACS data

In this assignment, the year of tracts data used is 2006 and 2017, because of the limitation of Chicago building permits database.

Building permit number, medium rent, white percentage and total population are selected as indicators of TOD.

tracts09 =
  get_acs(geography = "tract", variables = c("B25026_001E","B02001_002E",
                                             "B19013_001E","B25058_001E",
                                             "B06012_002E"), 
          year=2009, state="Illinois", county="Cook County", geometry=T, output="wide") %>%
  st_transform('EPSG:3435') %>%
  rename(TotalPop = B25026_001E, Whites = B02001_002E,
         MedHHInc = B19013_001E, MedRent = B25058_001E,
         TotalPoverty = B06012_002E) %>%
  dplyr::select(-NAME, -starts_with("B")) %>%
  mutate(pctWhite = ifelse(TotalPop > 0, Whites / TotalPop,0),
         pctPoverty = ifelse(TotalPop > 0, TotalPoverty / TotalPop, 0),
         year = "2009") %>%
  dplyr::select(-Whites, -TotalPoverty) 

tracts19 = 
  get_acs(geography = "tract", variables = c("B25026_001E","B02001_002E",
                                             "B19013_001E","B25058_001E",
                                             "B06012_002E"), 
          year=2019, state="Illinois", county="Cook County", geometry=T, output="wide") %>%
  st_transform('EPSG:3435') %>%
  rename(TotalPop = B25026_001E, Whites = B02001_002E,
         MedHHInc = B19013_001E, MedRent = B25058_001E,
         TotalPoverty = B06012_002E) %>%
  dplyr::select(-NAME, -starts_with("B")) %>%
  mutate(pctWhite = ifelse(TotalPop > 0, Whites / TotalPop,0),
         pctPoverty = ifelse(TotalPop > 0, TotalPoverty / TotalPop, 0),
         year = "2019") %>%
  dplyr::select(-Whites, -TotalPoverty) 

1.2 Chicago city boundary

Due to the tract data from Cook County, Chicago tracts need to be selected.

Data source: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2010/5jrd-6zik#revert

Chicago.tracts = 
  st_read("data_Chicago/boundary_Ch.geojson") %>%
  select(geoid10) %>% 
  rename(GEOID=geoid10) %>%
  st_transform('EPSG:3435')

boundary = Chicago.tracts %>% st_union()

tracts09.filtered = tracts09 %>% st_intersection(boundary)
tracts19.filtered = tracts19 %>% 
  inner_join(Chicago.tracts%>%st_drop_geometry(), by="GEOID")

boundary = tracts19.filtered %>% st_union()

1.3 Chicago L-train Stations

Data source: https://data.cityofchicago.org/Transportation/CTA-L-Rail-Stations-Shapefile/vmyy-m9qj

# Count the lines going through one station
# to determine city center stations.
countLines = function(t){
  all.lines = strsplit("Brown Red Yellow Orange Pink Purple Green Blue"," ")[[1]]
  line_number=0
  for (l in all.lines) {
    if (grepl(l,t)){line_number = line_number+1}
  }
  return(line_number)
}

station_Ch = st_read("data_Chicago/railStations/") %>%
  st_transform('EPSG:3435') %>% 
#  st_intersection(boundary) %>%
  rowwise()%>%
  mutate(line_number=countLines(LINES),
         is_city_center=line_number>=4) # setting city center

1.4 Building permits data

Data source: https://data.cityofchicago.org/Buildings/Building-Permits/ydr8-5enu

building_permit_Ch = 
  st_read("data_Chicago/buildingPermits_processed.geojson") %>%
  st_transform('EPSG:3435') %>%
  mutate(issue_date=as.numeric(format(issue_date,"%Y"))) %>%
  .[st_intersects(boundary)%>% lengths > 0,]

tracts09.all = tracts09.filtered %>% 
  st_join(building_permit_Ch%>%filter(issue_date ==2009),.) %>%
  st_drop_geometry()%>%
  group_by(GEOID)%>%
  summarise(BuildingPermitNumber=n())%>%
  mutate(pctBuildingPermit=BuildingPermitNumber/max(BuildingPermitNumber))%>%
  right_join(tracts09.filtered,by="GEOID")%>%
  mutate(
    BuildingPermitNumber=BuildingPermitNumber%>%replace_na(0),
    pctBuildingPermit=pctBuildingPermit%>%replace_na(0))%>%
  st_sf() %>%
  st_transform('EPSG:3435') 

tracts19.all = tracts19.filtered %>% 
  st_join(building_permit_Ch%>%filter(issue_date ==2019),.) %>%
  st_drop_geometry()%>%
  group_by(GEOID)%>%
  summarise(BuildingPermitNumber=n())%>%
  mutate(pctBuildingPermit=BuildingPermitNumber/max(BuildingPermitNumber))%>%
  right_join(tracts19.filtered,by="GEOID")%>%
  mutate(
    BuildingPermitNumber=BuildingPermitNumber%>%replace_na(0),
    pctBuildingPermit=pctBuildingPermit%>%replace_na(0))%>%
  st_sf() %>%
  st_transform('EPSG:3435') 

allTracts <- rbind(tracts09.all, tracts19.all)

2. Small-multiple visualizations

Creating buffer

buffer =
  st_union(st_buffer(station_Ch, 2640)) %>%
  st_sf() %>%
  mutate(Legend = "Unioned Buffer")

Grouping tracts by TOD

allTracts.group <- 
  rbind(
    st_centroid(allTracts)[buffer,] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "TOD"),
    st_centroid(allTracts)[buffer, op = st_disjoint] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "Non-TOD")) %>%
  mutate(MedRent.inf = ifelse(year == "2009", MedRent * 1.19, MedRent)) 

Plotting

p1 = ggplot(allTracts.group)+
  geom_sf(data=boundary,color="#222222",size=2) +
  geom_sf(aes(fill = TOD),color="white") +
  scale_fill_manual(values=myPal[c(1,6)])+
  labs(title = "TOD, Non-TOD",
       subtitle = "2009-2019") +
  facet_wrap(~year)+
  mapTheme() + theme()

p2 = ggplot(allTracts.group)+
  geom_sf(data=boundary,color="#222222",size=1) +
  geom_sf(aes(fill = q6(MedRent.inf)),size=0) +
  geom_sf(data = buffer, fill = "transparent", color = "#222222")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.group, "MedRent"),
                    name = "Rent\n(Quintile Breaks)") +
  labs(title = "Median Rent 2009-2019", subtitle = "Median Rent in Chicago") +
  facet_wrap(~year)+
  mapTheme() + theme()

p3 = ggplot(allTracts.group)+
  geom_sf(data=boundary,color="#222222",size=1) +
  geom_sf(aes(fill = q6(BuildingPermitNumber)),size=0) +
  geom_sf(data = buffer, fill = "transparent", color = "#222222")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.group, "BuildingPermitNumber"),
                    name = "Number\n(Quintile Breaks)") +
  labs(title = "Building Permits Number 2009-2019", 
       subtitle = "Chicago building permits") +
  facet_wrap(~year)+
  mapTheme() + theme()

p4 = ggplot(allTracts.group)+
  geom_sf(data=boundary,color="#222222",size=1) +
  geom_sf(aes(fill = q6(pctWhite)),size=0) +
  geom_sf(data = buffer, fill = "transparent", color = "#222222")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.group, "pctWhite",F),
                    name = "pctWhite\n(Quintile Breaks)") +
  labs(title = "White percentage 2009-2019", subtitle = "Chicago white percentage") +
  facet_wrap(~year)+
  mapTheme() + theme()

p5 = ggplot(allTracts.group)+
  geom_sf(data=boundary,color="#222222",size=1) +
  geom_sf(aes(fill = q6(TotalPop)),size=0) +
  geom_sf(data = buffer, fill = "transparent", color = "#222222")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.group, "TotalPop"),
                    name = "Population\n(Quintile Breaks)") +
  labs(title = "Population 2009-2019", subtitle = "Total Population of Chicago") +
  facet_wrap(~year)+
  mapTheme() + theme()
p1

((p2+p3)/(p5+p4))

The map suggests that although rents, building permits number and population increased in Chicago’s central business district, many areas close to transit did not see significant changes.

3. Grouped bar plot

Summary data

allTracts.Summary = 
  st_drop_geometry(allTracts.group) %>%
  group_by(year, TOD) %>%
  summarize(Rent = mean(MedRent, na.rm = T),
            Population = mean(TotalPop, na.rm = T),
            Percent_White = mean(pctWhite, na.rm = T),
            Building_Permit_n = mean(BuildingPermitNumber, na.rm = T))

Bar

allTracts.Summary %>%
  gather(Variable, Value, -year, -TOD) %>%
  ggplot(aes(year, Value, fill = TOD)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~Variable, scales = "free", ncol=5) +
  scale_fill_manual(values = myPal[c(2,6)]) +
  labs(title = "Indicator differences across time and space") +
  plotTheme() + theme(legend.position="bottom")

Between 2009 and 2019, rents in Chicago increased slightly, and building permits number goes up. It appears that the government allows more development around TOD area, and more residents live near transit stations. Also, residents are willing to pay more for transit access.

Table

allTracts.Summary %>%
  unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
  gather(Variable, Value, -year.TOD) %>%
  mutate(Value = round(Value, 2)) %>%
  spread(year.TOD, Value) %>%
  kable() %>%
  kable_styling() %>%
  footnote(general_title = "\n",
           general = "Table 1")
Variable 2009: Non-TOD 2009: TOD 2019: Non-TOD 2019: TOD
Building_Permit_n 38.12 49.82 48.67 74.72
Percent_White 0.43 0.45 0.45 0.52
Population 3776.04 2568.28 3524.27 3034.67
Rent 729.34 797.67 912.29 1117.97

Table 1

4. Sub-markets

Defining 3 sub-markets: center city, normal TOD area, and non-TOD area.

centerCity =
  st_buffer(filter(station_Ch, is_city_center), 2640) %>% 
  st_union() %>% st_sf() %>%
  mutate(Submarket = "Center City")

aroundStation =
  st_buffer(filter(station_Ch, !is_city_center), 2640) %>% 
  st_union() %>% st_sf() %>%
  st_difference(centerCity) %>%
  mutate(Submarket = "Around Station")

Markets = rbind(centerCity, aroundStation)

allTracts.Markets =
  st_join(st_centroid(allTracts), Markets) %>%
  st_drop_geometry() %>%
  left_join(allTracts) %>%
  mutate(Submarket = replace_na(Submarket, "Non-TOD")) %>%
  st_sf() 
ggplot(allTracts.Markets)+
  geom_sf(data=boundary,color="#222222",size=1) +
  geom_sf(aes(fill = Submarket),color="white",size=0.15) +
  labs(title = "Submarkets",subtitle = "Time/Space Groups") +
  scale_fill_manual(values=myPal[c(4,6,1)])+
  facet_wrap(~year)+
  mapTheme() + theme()

allTracts.Markets.Summary <- 
  st_drop_geometry(allTracts.Markets) %>%
  group_by(year, Submarket) %>%
  summarize(Rent = mean(MedRent, na.rm = T),
            Population = mean(TotalPop, na.rm = T),
            Percent_White = mean(pctWhite, na.rm = T),
            Building_Permit_n = mean(BuildingPermitNumber, na.rm = T))

kable(allTracts.Markets.Summary) %>%
  kable_styling() %>%
  footnote(general_title = "\n",
           general = "Table 2")
year Submarket Rent Population Percent_White Building_Permit_n
2009 Around Station 779.7214 2550.769 0.4319457 39.17778
2009 Center City 1477.6667 3198.600 0.9960491 432.80000
2009 Non-TOD 729.3407 3776.044 0.4302178 38.11922
2019 Around Station 1088.3061 2941.728 0.5103028 57.47651
2019 Center City 2087.0000 6112.111 0.8529753 645.77778
2019 Non-TOD 912.2910 3524.267 0.4527128 48.66802

Table 2
allTracts.Markets.Summary %>%
  gather(Variable, Value, -year, -Submarket) %>%
  ggplot(aes(year, Value, fill = Submarket)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values=myPal[c(4,6,1)])+
  facet_wrap(~Variable, scales = "free",ncol = 4) +
  labs(title = "Indicator differences across time and space") +
  plotTheme() + theme(legend.position="bottom")

All four indicators in Center City may have been driving the results above, thus, there is no clear evidence that residents value TOD.

5. Graduated symbol maps

buffer_half_mile =
  st_buffer(station_Ch, 5280) %>%
  st_sf() %>% select(geometry) %>%
  rowid_to_column("BufferID")

allTracts.gsm = st_centroid(allTracts) %>%
  select(GEOID,geometry) %>%
  st_join(buffer_half_mile) %>%
  st_drop_geometry()%>%
  left_join(allTracts,by="GEOID")%>%
  group_by(BufferID,year)%>%
  summarise(popMean=mean(TotalPop,na.rm=T),
            rentMean=mean(MedRent,na.rm=T))%>%
  left_join(st_centroid(buffer_half_mile),
            by="BufferID")%>%
  drop_na()%>%
  st_sf()
ggplot(data=allTracts.gsm)+
  geom_sf(data=boundary,color="#222222") +
  geom_sf(aes(fill=q6(popMean),size=q6(popMean)),
          shape=21,alpha=0.6)+
  scale_size_manual(values = seq(1.5,4,length.out=6),
                    labels = qBr6(allTracts.gsm, "popMean"),
                    name = "Population")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.gsm, "popMean"),
                    name = "Population")+
  labs(title = "Mean population within 0.5 mile of station", subtitle = "Graduated symbol map") +
  facet_wrap(~year)+mapTheme()

ggplot(data=allTracts.gsm)+
  geom_sf(data = boundary,color="#222222")+
  geom_sf(aes(fill=q6(rentMean),size=q6(rentMean)),
          shape=21,alpha=0.6)+
  scale_size_manual(values = seq(1.5,4,length.out=6),
                    labels = qBr6(allTracts.gsm, "rentMean"),
                    name = "rentMean")+
  scale_fill_manual(values = myPal,
                    labels = qBr6(allTracts.gsm, "rentMean"),
                    name = "rentMean")+
  labs(title = "Mean rent within 0.5 mile of station", subtitle = "Graduated symbol map") +
  facet_wrap(~year)+mapTheme()

  1. There is a tendency that the population moves from stations in urban fringe to stations in the city center.
  2. The rents around the stations in the city center grows significantly.

6. geom_line plot

# This function runs faster on my computer
new.MRB = function(points,distance){
  # points: sf
  # distance: vector of integer
  # output: sf
  allRings = st_sf(st_sfc(),crs=st_crs(points))
  for (d in distance){
    buffer = points %>% st_union() %>%
      st_buffer(d) %>% st_union() %>%
      st_difference(st_union(allRings))
    temp = st_sf(distance=d, geometry=buffer)
    allRings = rbind(allRings,temp)
  }
  return(allRings)
}
MRB = new.MRB(station_Ch,1:15*2640)

allTracts.rings =
  st_join(st_centroid(dplyr::select(allTracts, GEOID, year)), MRB) %>%
  st_drop_geometry() %>%
  left_join(dplyr::select(allTracts, GEOID, MedRent, year),
            by=c("GEOID"="GEOID", "year"="year")) %>%
  select(-geometry) %>%
  mutate(distance = distance / 5280) #convert to miles

rings.summary = allTracts.rings%>%
  group_by(year,distance)%>%
  summarise(meanRent = mean(MedRent,na.rm=T),
            number = n())
p1 = ggplot(rings.summary)+
  geom_line(aes(distance,meanRent,color = year))+
  scale_color_manual(values = myPal[c(2,5)],
                     name = "Year")+
  labs(title = "Rent as a function of distance to subway stations",
       subtitle = "Chicago, 2009-2019",
       x="",y="Mean rent ($)")+
  plotTheme()

p2 = ggplot(rings.summary)+
  geom_col(aes(distance,number,group=year,fill = year),position = "dodge")+
  scale_fill_manual(values = myPal[c(2,5)],
                    name = "Year")+
  labs(x="Distance to station (mile)",y="Number")+
  plotTheme()
  
p1/p2+plot_layout(heights = c(2, 1))

  1. Rents seem to be higher in areas close to transit relative to places at greater distances.

  2. Residents value TOD more in 2019 than 2009.

  3. There is a rise around 5 miles, but the number of sample is too small to make rational speculations.

7. Crime, Transit access and Rents

Data: Crime data, 2019, Chicago

Source: https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-Present/ijzp-q8t2

crime.all = read_csv("./data_Chicago/crimeCh2019.csv")
crime.theft = 
  crime.all %>%
  filter(`Primary Type`=="THEFT")  %>%
  drop_na(Latitude,Longitude)%>%
  st_as_sf(coords=c("Longitude","Latitude"),crs=4326) %>%
  select(Date) %>%
  rename(date = Date) %>%
  rowid_to_column("crimeID") %>%
  st_transform('EPSG:3435')
tracts19.distance = tracts19.all %>%
  mutate(
    closestPT=st_geometry(station_Ch[st_nearest_feature(st_centroid(geometry),station_Ch),"geometry"]),
    distance_to_st = as.numeric(st_distance(st_centroid(geometry),closestPT,by_element = TRUE)) / 5280
  )

tracts19.crime = 
  tracts19.distance %>%
  st_join(crime.theft,left = T) %>%
  group_by(GEOID) %>%
  summarise(theftNumber=n()) %>% 
  st_drop_geometry()%>%
  left_join(tracts19.distance,.,by="GEOID") 

tracts19.crime.bin = 
  tracts19.crime%>%
  mutate(dis = qn(distance_to_st,20),
         rent = qn(MedRent,20))

tracts19.crime.bin.a = 
  tracts19.crime.bin %>%
  group_by(dis)%>%
  summarise(rent=mean(MedRent,na.rm=T),
            theftNumber=mean(theftNumber,na.rm=T))

tracts19.crime.bin.b = 
  tracts19.crime.bin %>%
  drop_na(rent)%>%
  group_by(rent) %>%
  summarise(dis=mean(distance_to_st,na.rm=T),
            theftNumber=median(theftNumber,na.rm=T))
p1 = ggplot(tracts19.crime.bin.a)+
  geom_col(aes(dis,theftNumber),fill=myPal2[1])+
  labs(title = "Theft number as a function of distance to subway stations",
       subtitle = "Chicago, 2019",
       x = "Distance(quantile)",y="Average theft number")+
  plotTheme()
  
p2 = ggplot(tracts19.crime.bin.b)+
  geom_col(aes(rent,theftNumber),fill=myPal2[1])+
  labs(title = "Theft number as a function of rent",
       subtitle = "Chicago, 2019",
       x = "Rent(quantile)",y="Average theft number")+
  plotTheme()

p1/p2

p3 = ggplot(tracts19.crime.bin,aes(dis,theftNumber))+
  geom_boxplot(outlier.color = myPal[1])+
  labs(title = "Outliers in theft number",
       subtitle = "Chicago, 2019",
       x = "Distance(quantile)",y="Theft number")+
  plotTheme()

p4 = ggplot(tracts19.crime.bin,aes(rent,theftNumber))+
  geom_boxplot(outlier.color = myPal[1])+
  labs(title = "Outliers in theft number",
       subtitle = "Chicago, 2019",
       x = "Rent(quantile)",y="Theft number")+
  plotTheme()

p3/p4

ggplot(tracts19.crime)+
  geom_point(aes(distance_to_st,MedRent,
                 size = q6(theftNumber),
                 color = q6(theftNumber)),alpha=.8)+
  scale_color_manual(values = rev(myPal),
                    labels = qBr6(tracts19.crime, "theftNumber"),
                    name = "Theft Number\nof one tract\n(quantile)")+
  scale_size_manual(values = 1:6*.8,
                    labels = qBr6(tracts19.crime, "theftNumber"),
                    name = "Theft Number\nof one tract\n(quantile)")+
  labs(title = "Theft number as a function of rent and distance",
       subtitle = "Chicago, 2019",
       x = "Distance to stations (mile)",y="Rent ($)")+
  plotTheme()

  1. Overall, there are slightly more thefts around the stations, and extremely high-value area is more likely to be stolen.

  2. However, the outliers in both cases cannot be neglected. One possible reason for that is areas around city center have higher theft rates.

Conclusion

  1. The Center City effect in Chicago is causing rapid appreciation of land in city center, growing population density and increasing development number.

  2. Residents don’t seem to value TOD than 10 years ago. At this point, seems to have had little effect on people’s housing choices. Thus, Governors should review existing policies for better results.

  3. However, the analysis above is limited to the L-Train system in Chicago, other transit systems like bus also have a lot of traffic, so bias is inevitable here.